From bb5cc13ee9db304ccc0f558d1be6e5dd0a42017c Mon Sep 17 00:00:00 2001 From: Roland Schulz Date: Wed, 12 Sep 2018 12:58:23 -0700 Subject: [PATCH] Remove sparc64_hpc_ace nonbonded kernels Unclear whether they work after C++ change. No plans to test them. Change-Id: I5fe296ec34c51d0e9d0500eef3c6c7d5cd1b4a76 --- docs/doxygen/suppressions.txt | 2 - src/gromacs/gmxlib/nonbonded/CMakeLists.txt | 5 - .../kernelutil_sparc64_hpc_ace_double.h | 972 ----- .../make_nb_kernel_sparc64_hpc_ace_double.py | 513 --- ...wCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp | 711 ---- ...wCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp | 1173 ------ ...wCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp | 2309 ------------ ...wCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp | 1329 ------- ...wCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp | 2477 ------------- ..._VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp | 635 ---- ..._VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp | 1097 ------ ..._VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp | 2233 ------------ ..._VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp | 1201 ------- ..._VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp | 2349 ------------ ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 562 --- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 1024 ------ ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 2168 ------------ ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 1024 ------ ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 2168 ------------ ...wCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp | 679 ---- ...wCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp | 989 ------ ...wCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp | 1669 --------- ...wCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp | 1097 ------ ...wCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp | 1789 ---------- ..._VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp | 545 --- ..._VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp | 855 ----- ..._VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp | 1535 -------- ..._VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp | 963 ----- ..._VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp | 1655 --------- ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 480 --- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 790 ----- ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 1478 -------- ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 790 ----- ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 1478 -------- ...LJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp | 732 ---- ...LJEwSh_GeomW3P1_sparc64_hpc_ace_double.cpp | 1232 ------- ...LJEwSh_GeomW3W3_sparc64_hpc_ace_double.cpp | 2472 ------------- ...LJEwSh_GeomW4P1_sparc64_hpc_ace_double.cpp | 1390 -------- ...LJEwSh_GeomW4W4_sparc64_hpc_ace_double.cpp | 2642 -------------- ...dwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp | 672 ---- ...dwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp | 1168 ------ ...dwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp | 2406 ------------- ...dwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp | 1312 ------- ...dwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp | 2562 -------------- ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 597 ---- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 1093 ------ ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 2339 ------------ ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 1093 ------ ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 2339 ------------ ...dwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp | 759 ---- ...dwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp | 1365 ------- ...dwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp | 2933 --------------- ...dwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp | 1557 -------- ...dwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp | 3137 ----------------- ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 680 ---- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 1286 ------- ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 2862 --------------- ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 1286 ------- ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 2862 --------------- ...wCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp | 740 ---- ...wCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp | 1160 ------ ...wCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp | 2170 ------------ ...wCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp | 1276 ------- ...wCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp | 2298 ------------ ...dwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp | 674 ---- ...dwLJEw_GeomW3P1_sparc64_hpc_ace_double.cpp | 1098 ------ ...dwLJEw_GeomW3W3_sparc64_hpc_ace_double.cpp | 2110 ----------- ...dwLJEw_GeomW4P1_sparc64_hpc_ace_double.cpp | 1220 ------- ...dwLJEw_GeomW4W4_sparc64_hpc_ace_double.cpp | 2244 ------------ ..._VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp | 614 ---- ..._VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp | 1034 ------ ..._VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp | 2044 ----------- ..._VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp | 1142 ------ ..._VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp | 2164 ------------ ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 549 --- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 969 ----- ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 1987 ----------- ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 969 ----- ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 1987 ----------- ...wCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp | 632 ---- ...LJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp | 628 ---- ...dwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp | 574 --- ...dwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp | 552 --- ...dwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp | 636 ---- ..._VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp | 498 --- ...wCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp | 733 ---- ...wCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp | 1115 ------ ...wCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp | 2011 ----------- ...wCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp | 1221 ------- ...wCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp | 2129 ----------- ...dwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp | 607 ---- ...dwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp | 989 ------ ...dwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp | 1885 ---------- ...dwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp | 1133 ------ ...dwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp | 2041 ----------- ...dwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp | 683 ---- ...dwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp | 1065 ------ ...dwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp | 1961 ----------- ...dwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp | 1213 ------- ...dwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp | 2121 ----------- ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 532 --- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 914 ----- ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 1818 ---------- ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 914 ----- ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 1818 ---------- ...wCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp | 683 ---- ...wCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp | 989 ------ ...wCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp | 1657 --------- ...wCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp | 1097 ------ ...wCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp | 1777 ---------- ..._VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp | 549 --- ..._VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp | 855 ----- ..._VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp | 1523 -------- ..._VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp | 963 ----- ..._VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp | 1643 --------- ...dwNone_GeomP1P1_sparc64_hpc_ace_double.cpp | 484 --- ...dwNone_GeomW3P1_sparc64_hpc_ace_double.cpp | 790 ----- ...dwNone_GeomW3W3_sparc64_hpc_ace_double.cpp | 1466 -------- ...dwNone_GeomW4P1_sparc64_hpc_ace_double.cpp | 790 ----- ...dwNone_GeomW4W4_sparc64_hpc_ace_double.cpp | 1466 -------- .../nb_kernel_sparc64_hpc_ace_double.cpp | 514 --- .../nb_kernel_sparc64_hpc_ace_double.h | 48 - ...kernel_template_sparc64_hpc_ace_double.pre | 1042 ------ src/gromacs/gmxlib/nonbonded/nonbonded.cpp | 11 - 124 files changed, 160739 deletions(-) delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h delete mode 100755 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCoul_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJEw_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecEw_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJEw_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecNone_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwLJ_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomP1P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW3W3_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4P1_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecRF_VdwNone_GeomW4W4_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.cpp delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h delete mode 100644 src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_template_sparc64_hpc_ace_double.pre diff --git a/docs/doxygen/suppressions.txt b/docs/doxygen/suppressions.txt index 5627a7593a..bacbd4b012 100644 --- a/docs/doxygen/suppressions.txt +++ b/docs/doxygen/suppressions.txt @@ -19,7 +19,6 @@ src/gromacs/ewald/pme-simd4.h: warning: should include "pme-simd.h" src/gromacs/ewald/pme-spline-work.cpp: warning: includes "simd.h" unnecessarily src/gromacs/ewald/pme-spline-work.h: warning: includes "simd.h" unnecessarily src/gromacs/ewald/pme-spread.cpp: warning: includes "simd.h" unnecessarily -src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: includes "simd.h" unnecessarily src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_inner.h: warning: should include "simd.h" src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_outer.h: warning: should include "simd.h" src/gromacs/mdlib/nbnxn_kernels/simd_4xn/nbnxn_kernel_simd_4xn_inner.h: warning: should include "simd.h" @@ -32,7 +31,6 @@ src/gromacs/mdlib/nbnxn_search_simd_4xn.h: warning: should include "simd.h" # These would be nice to fix, but can wait for later / deletion / rewrites src/gromacs/gmxlib/nonbonded/nb_kernel_*/*: warning: includes "config.h" unnecessarily -src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h: warning: should include "config.h" src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_gpu_ref.cpp: warning: includes "config.h" unnecessarily src/gromacs/mdlib/nbnxn_kernels/nbnxn_kernel_ref.cpp: warning: includes "config.h" unnecessarily src/gromacs/mdlib/nbnxn_kernels/simd_2xnn/nbnxn_kernel_simd_2xnn_common.h: warning: should include "config.h" diff --git a/src/gromacs/gmxlib/nonbonded/CMakeLists.txt b/src/gromacs/gmxlib/nonbonded/CMakeLists.txt index 62c40ccd34..29bae79a42 100644 --- a/src/gromacs/gmxlib/nonbonded/CMakeLists.txt +++ b/src/gromacs/gmxlib/nonbonded/CMakeLists.txt @@ -80,11 +80,6 @@ if((("${GMX_SIMD_ACTIVE}" STREQUAL "AVX_256") file(GLOB NONBONDED_AVX_256_DOUBLE_SOURCES nb_kernel_avx_256_double/*.cpp) endif() -if("${GMX_SIMD_ACTIVE}" STREQUAL "SPARC64_HPC_ACE" AND GMX_DOUBLE) - file(GLOB NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES nb_kernel_sparc64_hpc_ace_double/*.cpp) -endif() - - # These sources will be used in the parent directory's CMakeLists.txt set(NONBONDED_KERNEL_SOURCES ${NONBONDED_C_SOURCES} ${NONBONDED_SSE2_SINGLE_SOURCES} ${NONBONDED_SSE4_1_SINGLE_SOURCES} ${NONBONDED_AVX_128_FMA_SINGLE_SOURCES} ${NONBONDED_AVX_256_SINGLE_SOURCES} ${NONBONDED_SSE2_DOUBLE_SOURCES} ${NONBONDED_SSE4_1_DOUBLE_SOURCES} ${NONBONDED_AVX_128_FMA_DOUBLE_SOURCES} ${NONBONDED_AVX_256_DOUBLE_SOURCES} ${NONBONDED_SPARC64_HPC_ACE_DOUBLE_SOURCES}) target_sources(libgromacs_generated PRIVATE ${NONBONDED_KERNEL_SOURCES}) diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h deleted file mode 100644 index 32b138519a..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/kernelutil_sparc64_hpc_ace_double.h +++ /dev/null @@ -1,972 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -#ifndef _kernelutil_sparc64_hpc_ace_double_h_ -#define _kernelutil_sparc64_hpc_ace_double_h_ - -/* Get gmx_simd_exp_d() */ -#include "gromacs/simd/simd.h" -#include "gromacs/simd/simd_math.h" - -/* Fujitsu header borrows the name from SSE2, since some instructions have aliases. - * Environment/compiler version GM-1.2.0-17 seems to be buggy; when -Xg is - * defined to enable GNUC extensions, this sets _ISOC99_SOURCE, which in - * turn causes all intrinsics to be declared inline _instead_ of static. This - * leads to duplicate symbol errors at link time. - * To work around this we unset this before including the HPC-ACE header, and - * reset the value afterwards. - */ -#ifdef _ISOC99_SOURCE -# undef _ISOC99_SOURCE -# define SAVE_ISOC99_SOURCE -#endif - -#include - -#ifdef SAVE_ISOC99_SOURCE -# define _ISOC99_SOURCE -# undef SAVE_ISOC99_SOURCE -#endif - -#define GMX_FJSP_SHUFFLE2(x, y) (((x)<<1) | (y)) - -#define GMX_FJSP_TRANSPOSE2_V2R8(row0, row1) { \ - _fjsp_v2r8 __gmx_t1 = row0; \ - row0 = _fjsp_unpacklo_v2r8(row0, row1); \ - row1 = _fjsp_unpackhi_v2r8(__gmx_t1, row1); \ -} - - -static void -gmx_fjsp_print_v2r8(const char *s, _fjsp_v2r8 a) -{ - double lo, hi; - - _fjsp_storel_v2r8(&lo, a); - _fjsp_storeh_v2r8(&hi, a); - printf("%s: %g %g\n", s, lo, hi); -} - - -static _fjsp_v2r8 -gmx_fjsp_set1_v2r8(double d) -{ - return _fjsp_set_v2r8(d, d); -} - -static _fjsp_v2r8 -gmx_fjsp_load1_v2r8(const double * gmx_restrict ptr) -{ - return gmx_fjsp_set1_v2r8(*ptr); -} - - -static int -gmx_fjsp_any_lt_v2r8(_fjsp_v2r8 a, _fjsp_v2r8 b) -{ - union - { - double d; - long long int i; - } - conv; - - a = _fjsp_cmplt_v2r8(a, b); - a = _fjsp_or_v2r8(a, _fjsp_unpackhi_v2r8(a, a)); - _fjsp_storel_v2r8(&(conv.d), a); - return (conv.i != 0); -} - -/* 1.0/sqrt(x) */ -static gmx_inline _fjsp_v2r8 -gmx_fjsp_invsqrt_v2r8(_fjsp_v2r8 x) -{ - const _fjsp_v2r8 half = gmx_fjsp_set1_v2r8(0.5); - const _fjsp_v2r8 three = gmx_fjsp_set1_v2r8(3.0); - _fjsp_v2r8 lu = _fjsp_rsqrta_v2r8(x); - - lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three)); - /* The HPC-ACE instruction set is only available in double precision, while - * single precision is typically sufficient for Gromacs. If you define - * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson - * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full - * double precision (53 bits). This is still clearly higher than single precision (24 bits). - */ -#ifndef GMX_RELAXED_DOUBLE_PRECISION - lu = _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three)); -#endif - return _fjsp_mul_v2r8(_fjsp_mul_v2r8(half, lu), _fjsp_nmsub_v2r8(_fjsp_mul_v2r8(lu, lu), x, three)); -} - - -/* 1.0/x */ -static gmx_inline _fjsp_v2r8 -gmx_fjsp_inv_v2r8(_fjsp_v2r8 x) -{ - const _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - __m128d lu = _fjsp_rcpa_v2r8(x); - - /* Perform three N-R steps for double precision */ - lu = _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two)); - /* The HPC-ACE instruction set is only available in double precision, while - * single precision is typically sufficient for Gromacs. If you define - * "GMX_RELAXED_DOUBLE_PRECISION" during compile, we stick to two Newton-Raphson - * iterations and accept 32bits of accuracy in 1.0/sqrt(x) and 1.0/x, rather than full - * double precision (53 bits). This is still clearly higher than single precision (24 bits). - */ -#ifndef GMX_RELAXED_DOUBLE_PRECISION - lu = _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two)); -#endif - return _fjsp_mul_v2r8(lu, _fjsp_nmsub_v2r8(lu, x, two)); -} - - -static gmx_inline _fjsp_v2r8 -gmx_fjsp_calc_rsq_v2r8(_fjsp_v2r8 dx, _fjsp_v2r8 dy, _fjsp_v2r8 dz) -{ - return _fjsp_madd_v2r8(dx, dx, _fjsp_madd_v2r8(dy, dy, _fjsp_mul_v2r8(dz, dz))); -} - -/* Normal sum of four ymm registers */ -#define gmx_fjsp_sum4_v2r8(t0, t1, t2, t3) _fjsp_add_v2r8(_fjsp_add_v2r8(t0, t1), _fjsp_add_v2r8(t2, t3)) - - - - - -static _fjsp_v2r8 -gmx_fjsp_load_2real_swizzle_v2r8(const double * gmx_restrict ptrA, - const double * gmx_restrict ptrB) -{ - return _fjsp_unpacklo_v2r8(_fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA), _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB)); -} - -static _fjsp_v2r8 -gmx_fjsp_load_1real_v2r8(const double * gmx_restrict ptrA) -{ - return _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA); -} - - -static void -gmx_fjsp_store_2real_swizzle_v2r8(double * gmx_restrict ptrA, - double * gmx_restrict ptrB, - _fjsp_v2r8 xmm1) -{ - _fjsp_v2r8 t2; - - t2 = _fjsp_unpackhi_v2r8(xmm1, xmm1); - _fjsp_storel_v2r8(ptrA, xmm1); - _fjsp_storel_v2r8(ptrB, t2); -} - -static void -gmx_fjsp_store_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1) -{ - _fjsp_storel_v2r8(ptrA, xmm1); -} - - -/* Similar to store, but increments value in memory */ -static void -gmx_fjsp_increment_2real_swizzle_v2r8(double * gmx_restrict ptrA, - double * gmx_restrict ptrB, _fjsp_v2r8 xmm1) -{ - _fjsp_v2r8 t1; - - t1 = _fjsp_unpackhi_v2r8(xmm1, xmm1); - xmm1 = _fjsp_add_v2r8(xmm1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA)); - t1 = _fjsp_add_v2r8(t1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB)); - _fjsp_storel_v2r8(ptrA, xmm1); - _fjsp_storel_v2r8(ptrB, t1); -} - -static void -gmx_fjsp_increment_1real_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 xmm1) -{ - _fjsp_v2r8 tmp; - - tmp = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA); - tmp = _fjsp_add_v2r8(tmp, xmm1); - _fjsp_storel_v2r8(ptrA, tmp); -} - - - -static gmx_inline void -gmx_fjsp_load_2pair_swizzle_v2r8(const double * gmx_restrict p1, - const double * gmx_restrict p2, - _fjsp_v2r8 * gmx_restrict c6, - _fjsp_v2r8 * gmx_restrict c12) -{ - _fjsp_v2r8 t1, t2, t3; - - /* The c6/c12 array should be aligned */ - t1 = _fjsp_load_v2r8(p1); - t2 = _fjsp_load_v2r8(p2); - *c6 = _fjsp_unpacklo_v2r8(t1, t2); - *c12 = _fjsp_unpackhi_v2r8(t1, t2); -} - -static gmx_inline void -gmx_fjsp_load_1pair_swizzle_v2r8(const double * gmx_restrict p1, - _fjsp_v2r8 * gmx_restrict c6, - _fjsp_v2r8 * gmx_restrict c12) -{ - *c6 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1); - *c12 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1); -} - - -static gmx_inline void -gmx_fjsp_load_shift_and_1rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift, - const double * gmx_restrict xyz, - _fjsp_v2r8 * gmx_restrict x1, - _fjsp_v2r8 * gmx_restrict y1, - _fjsp_v2r8 * gmx_restrict z1) -{ - _fjsp_v2r8 mem_xy, mem_z, mem_sxy, mem_sz; - - mem_xy = _fjsp_load_v2r8(xyz); - mem_z = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz+2); - mem_sxy = _fjsp_load_v2r8(xyz_shift); - mem_sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2); - - mem_xy = _fjsp_add_v2r8(mem_xy, mem_sxy); - mem_z = _fjsp_add_v2r8(mem_z, mem_sz); - - *x1 = _fjsp_shuffle_v2r8(mem_xy, mem_xy, GMX_FJSP_SHUFFLE2(0, 0)); - *y1 = _fjsp_shuffle_v2r8(mem_xy, mem_xy, GMX_FJSP_SHUFFLE2(1, 1)); - *z1 = _fjsp_shuffle_v2r8(mem_z, mem_z, GMX_FJSP_SHUFFLE2(0, 0)); -} - - -static gmx_inline void -gmx_fjsp_load_shift_and_3rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift, - const double * gmx_restrict xyz, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, - _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, - _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, sxy, sz, szx, syz; - - t1 = _fjsp_load_v2r8(xyz); - t2 = _fjsp_load_v2r8(xyz+2); - t3 = _fjsp_load_v2r8(xyz+4); - t4 = _fjsp_load_v2r8(xyz+6); - t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz+8); - - sxy = _fjsp_load_v2r8(xyz_shift); - sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2); - szx = _fjsp_shuffle_v2r8(sz, sxy, GMX_FJSP_SHUFFLE2(0, 0)); - syz = _fjsp_shuffle_v2r8(sxy, sz, GMX_FJSP_SHUFFLE2(0, 1)); - - t1 = _fjsp_add_v2r8(t1, sxy); - t2 = _fjsp_add_v2r8(t2, szx); - t3 = _fjsp_add_v2r8(t3, syz); - t4 = _fjsp_add_v2r8(t4, sxy); - t5 = _fjsp_add_v2r8(t5, sz); - - *x1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(0, 0)); - *y1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(1, 1)); - *z1 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(0, 0)); - *x2 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(1, 1)); - *y2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(0, 0)); - *z2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(1, 1)); - *x3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(0, 0)); - *y3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(1, 1)); - *z3 = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(0, 0)); -} - - -static gmx_inline void -gmx_fjsp_load_shift_and_4rvec_broadcast_v2r8(const double * gmx_restrict xyz_shift, - const double * gmx_restrict xyz, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, - _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, - _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3, - _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6, sxy, sz, szx, syz; - - t1 = _fjsp_load_v2r8(xyz); - t2 = _fjsp_load_v2r8(xyz+2); - t3 = _fjsp_load_v2r8(xyz+4); - t4 = _fjsp_load_v2r8(xyz+6); - t5 = _fjsp_load_v2r8(xyz+8); - t6 = _fjsp_load_v2r8(xyz+10); - - sxy = _fjsp_load_v2r8(xyz_shift); - sz = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), xyz_shift+2); - szx = _fjsp_shuffle_v2r8(sz, sxy, GMX_FJSP_SHUFFLE2(0, 0)); - syz = _fjsp_shuffle_v2r8(sxy, sz, GMX_FJSP_SHUFFLE2(0, 1)); - - t1 = _fjsp_add_v2r8(t1, sxy); - t2 = _fjsp_add_v2r8(t2, szx); - t3 = _fjsp_add_v2r8(t3, syz); - t4 = _fjsp_add_v2r8(t4, sxy); - t5 = _fjsp_add_v2r8(t5, szx); - t6 = _fjsp_add_v2r8(t6, syz); - - *x1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(0, 0)); - *y1 = _fjsp_shuffle_v2r8(t1, t1, GMX_FJSP_SHUFFLE2(1, 1)); - *z1 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(0, 0)); - *x2 = _fjsp_shuffle_v2r8(t2, t2, GMX_FJSP_SHUFFLE2(1, 1)); - *y2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(0, 0)); - *z2 = _fjsp_shuffle_v2r8(t3, t3, GMX_FJSP_SHUFFLE2(1, 1)); - *x3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(0, 0)); - *y3 = _fjsp_shuffle_v2r8(t4, t4, GMX_FJSP_SHUFFLE2(1, 1)); - *z3 = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(0, 0)); - *x4 = _fjsp_shuffle_v2r8(t5, t5, GMX_FJSP_SHUFFLE2(1, 1)); - *y4 = _fjsp_shuffle_v2r8(t6, t6, GMX_FJSP_SHUFFLE2(0, 0)); - *z4 = _fjsp_shuffle_v2r8(t6, t6, GMX_FJSP_SHUFFLE2(1, 1)); -} - - - -static gmx_inline void -gmx_fjsp_load_1rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1, - _fjsp_v2r8 * gmx_restrict x, _fjsp_v2r8 * gmx_restrict y, _fjsp_v2r8 * gmx_restrict z) -{ - *x = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1); - *y = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1); - *z = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2); -} - -static gmx_inline void -gmx_fjsp_load_3rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, - _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, - _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3) -{ - *x1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1); - *y1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1); - *z1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2); - *x2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+3); - *y2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+4); - *z2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+5); - *x3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+6); - *y3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+7); - *z3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+8); -} - -static gmx_inline void -gmx_fjsp_load_4rvec_1ptr_swizzle_v2r8(const double * gmx_restrict p1, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, - _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, - _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3, - _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4) -{ - *x1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1); - *y1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+1); - *z1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+2); - *x2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+3); - *y2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+4); - *z2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+5); - *x3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+6); - *y3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+7); - *z3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+8); - *x4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+9); - *y4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+10); - *z4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), p1+11); -} - - -static gmx_inline void -gmx_fjsp_load_1rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, - const double * gmx_restrict ptrB, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1) -{ - _fjsp_v2r8 t1, t2, t3, t4; - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrB); - t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2); - t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2); - GMX_FJSP_TRANSPOSE2_V2R8(t1, t2); - *x1 = t1; - *y1 = t2; - *z1 = _fjsp_unpacklo_v2r8(t3, t4); -} - -static gmx_inline void -gmx_fjsp_load_3rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, - _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, - _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10; - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrB); - t3 = _fjsp_load_v2r8(ptrA+2); - t4 = _fjsp_load_v2r8(ptrB+2); - t5 = _fjsp_load_v2r8(ptrA+4); - t6 = _fjsp_load_v2r8(ptrB+4); - t7 = _fjsp_load_v2r8(ptrA+6); - t8 = _fjsp_load_v2r8(ptrB+6); - t9 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8); - t10 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+8); - GMX_FJSP_TRANSPOSE2_V2R8(t1, t2); - GMX_FJSP_TRANSPOSE2_V2R8(t3, t4); - GMX_FJSP_TRANSPOSE2_V2R8(t5, t6); - GMX_FJSP_TRANSPOSE2_V2R8(t7, t8); - *x1 = t1; - *y1 = t2; - *z1 = t3; - *x2 = t4; - *y2 = t5; - *z2 = t6; - *x3 = t7; - *y3 = t8; - *z3 = _fjsp_unpacklo_v2r8(t9, t10); -} - - -static gmx_inline void -gmx_fjsp_load_4rvec_2ptr_swizzle_v2r8(const double * gmx_restrict ptrA, const double * gmx_restrict ptrB, - _fjsp_v2r8 * gmx_restrict x1, _fjsp_v2r8 * gmx_restrict y1, _fjsp_v2r8 * gmx_restrict z1, - _fjsp_v2r8 * gmx_restrict x2, _fjsp_v2r8 * gmx_restrict y2, _fjsp_v2r8 * gmx_restrict z2, - _fjsp_v2r8 * gmx_restrict x3, _fjsp_v2r8 * gmx_restrict y3, _fjsp_v2r8 * gmx_restrict z3, - _fjsp_v2r8 * gmx_restrict x4, _fjsp_v2r8 * gmx_restrict y4, _fjsp_v2r8 * gmx_restrict z4) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6; - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrB); - t3 = _fjsp_load_v2r8(ptrA+2); - t4 = _fjsp_load_v2r8(ptrB+2); - t5 = _fjsp_load_v2r8(ptrA+4); - t6 = _fjsp_load_v2r8(ptrB+4); - GMX_FJSP_TRANSPOSE2_V2R8(t1, t2); - GMX_FJSP_TRANSPOSE2_V2R8(t3, t4); - GMX_FJSP_TRANSPOSE2_V2R8(t5, t6); - *x1 = t1; - *y1 = t2; - *z1 = t3; - *x2 = t4; - *y2 = t5; - *z2 = t6; - t1 = _fjsp_load_v2r8(ptrA+6); - t2 = _fjsp_load_v2r8(ptrB+6); - t3 = _fjsp_load_v2r8(ptrA+8); - t4 = _fjsp_load_v2r8(ptrB+8); - t5 = _fjsp_load_v2r8(ptrA+10); - t6 = _fjsp_load_v2r8(ptrB+10); - GMX_FJSP_TRANSPOSE2_V2R8(t1, t2); - GMX_FJSP_TRANSPOSE2_V2R8(t3, t4); - GMX_FJSP_TRANSPOSE2_V2R8(t5, t6); - *x3 = t1; - *y3 = t2; - *z3 = t3; - *x4 = t4; - *y4 = t5; - *z4 = t6; -} - - -static void -gmx_fjsp_decrement_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, - _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1) -{ - _fjsp_v2r8 t1, t2, t3; - - t1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA); - t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+1); - t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2); - - t1 = _fjsp_sub_v2r8(t1, x1); - t2 = _fjsp_sub_v2r8(t2, y1); - t3 = _fjsp_sub_v2r8(t3, z1); - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storel_v2r8(ptrA+1, t2); - _fjsp_storel_v2r8(ptrA+2, t3); -} - -static void -gmx_fjsp_decrement_fma_1rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, _fjsp_v2r8 fscal, - _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1) -{ - _fjsp_v2r8 t1, t2, t3; - - t1 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA); - t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+1); - t3 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2); - - t1 = _fjsp_nmsub_v2r8(fscal, dx1, t1); - t2 = _fjsp_nmsub_v2r8(fscal, dy1, t2); - t3 = _fjsp_nmsub_v2r8(fscal, dz1, t3); - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storel_v2r8(ptrA+1, t2); - _fjsp_storel_v2r8(ptrA+2, t3); -} - - -static void -gmx_fjsp_decrement_3rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, - _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, - _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, - _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5; - - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrA+2); - t3 = _fjsp_load_v2r8(ptrA+4); - t4 = _fjsp_load_v2r8(ptrA+6); - t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8); - - x1 = _fjsp_unpacklo_v2r8(x1, y1); - z1 = _fjsp_unpacklo_v2r8(z1, x2); - y2 = _fjsp_unpacklo_v2r8(y2, z2); - x3 = _fjsp_unpacklo_v2r8(x3, y3); - /* nothing to be done for z3 */ - - t1 = _fjsp_sub_v2r8(t1, x1); - t2 = _fjsp_sub_v2r8(t2, z1); - t3 = _fjsp_sub_v2r8(t3, y2); - t4 = _fjsp_sub_v2r8(t4, x3); - t5 = _fjsp_sub_v2r8(t5, z3); - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storeh_v2r8(ptrA+1, t1); - _fjsp_storel_v2r8(ptrA+2, t2); - _fjsp_storeh_v2r8(ptrA+3, t2); - _fjsp_storel_v2r8(ptrA+4, t3); - _fjsp_storeh_v2r8(ptrA+5, t3); - _fjsp_storel_v2r8(ptrA+6, t4); - _fjsp_storeh_v2r8(ptrA+7, t4); - _fjsp_storel_v2r8(ptrA+8, t5); -} - - -static void -gmx_fjsp_decrement_4rvec_1ptr_swizzle_v2r8(double * gmx_restrict ptrA, - _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, - _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, - _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3, - _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6; - - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrA+2); - t3 = _fjsp_load_v2r8(ptrA+4); - t4 = _fjsp_load_v2r8(ptrA+6); - t5 = _fjsp_load_v2r8(ptrA+8); - t6 = _fjsp_load_v2r8(ptrA+10); - - x1 = _fjsp_unpacklo_v2r8(x1, y1); - z1 = _fjsp_unpacklo_v2r8(z1, x2); - y2 = _fjsp_unpacklo_v2r8(y2, z2); - x3 = _fjsp_unpacklo_v2r8(x3, y3); - z3 = _fjsp_unpacklo_v2r8(z3, x4); - y4 = _fjsp_unpacklo_v2r8(y4, z4); - - _fjsp_storel_v2r8(ptrA, _fjsp_sub_v2r8( t1, x1 )); - _fjsp_storeh_v2r8(ptrA+1, _fjsp_sub_v2r8( t1, x1 )); - _fjsp_storel_v2r8(ptrA+2, _fjsp_sub_v2r8( t2, z1 )); - _fjsp_storeh_v2r8(ptrA+3, _fjsp_sub_v2r8( t2, z1 )); - _fjsp_storel_v2r8(ptrA+4, _fjsp_sub_v2r8( t3, y2 )); - _fjsp_storeh_v2r8(ptrA+5, _fjsp_sub_v2r8( t3, y2 )); - _fjsp_storel_v2r8(ptrA+6, _fjsp_sub_v2r8( t4, x3 )); - _fjsp_storeh_v2r8(ptrA+7, _fjsp_sub_v2r8( t4, x3 )); - _fjsp_storel_v2r8(ptrA+8, _fjsp_sub_v2r8( t5, z3 )); - _fjsp_storeh_v2r8(ptrA+9, _fjsp_sub_v2r8( t5, z3 )); - _fjsp_storel_v2r8(ptrA+10, _fjsp_sub_v2r8( t6, y4 )); - _fjsp_storeh_v2r8(ptrA+11, _fjsp_sub_v2r8( t6, y4 )); -} - -static void -gmx_fjsp_decrement_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, - _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7; - - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2); - t3 = _fjsp_load_v2r8(ptrB); - t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2); - - t5 = _fjsp_unpacklo_v2r8(x1, y1); - t6 = _fjsp_unpackhi_v2r8(x1, y1); - t7 = _fjsp_unpackhi_v2r8(z1, z1); - - t1 = _fjsp_sub_v2r8(t1, t5); - t2 = _fjsp_sub_v2r8(t2, z1); - - t3 = _fjsp_sub_v2r8(t3, t6); - t4 = _fjsp_sub_v2r8(t4, t7); - - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storeh_v2r8(ptrA+1, t1); - _fjsp_storel_v2r8(ptrA+2, t2); - _fjsp_storel_v2r8(ptrB, t3); - _fjsp_storeh_v2r8(ptrB+1, t3); - _fjsp_storel_v2r8(ptrB+2, t4); -} - - -static void -gmx_fjsp_decrement_fma_1rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, - _fjsp_v2r8 fscal, _fjsp_v2r8 dx1, _fjsp_v2r8 dy1, _fjsp_v2r8 dz1) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, fscalA, fscalB; - - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+2); - t3 = _fjsp_load_v2r8(ptrB); - t4 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+2); - fscalA = _fjsp_unpacklo_v2r8(fscal, fscal); - fscalB = _fjsp_unpackhi_v2r8(fscal, fscal); - - t5 = _fjsp_unpacklo_v2r8(dx1, dy1); - t6 = _fjsp_unpackhi_v2r8(dx1, dy1); - t7 = _fjsp_unpackhi_v2r8(dz1, dz1); - - t1 = _fjsp_nmsub_v2r8(fscalA, t5, t1); - t2 = _fjsp_nmsub_v2r8(fscalA, dz1, t2); - - t3 = _fjsp_nmsub_v2r8(fscalB, t6, t3); - t4 = _fjsp_nmsub_v2r8(fscalB, t7, t4); - - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storeh_v2r8(ptrA+1, t1); - _fjsp_storel_v2r8(ptrA+2, t2); - _fjsp_storel_v2r8(ptrB, t3); - _fjsp_storeh_v2r8(ptrB+1, t3); - _fjsp_storel_v2r8(ptrB+2, t4); -} - - -static void -gmx_fjsp_decrement_3rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, - _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, - _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, - _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10; - _fjsp_v2r8 tA, tB, tC, tD, tE, tF, tG, tH, tI; - - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrA+2); - t3 = _fjsp_load_v2r8(ptrA+4); - t4 = _fjsp_load_v2r8(ptrA+6); - t5 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA+8); - t6 = _fjsp_load_v2r8(ptrB); - t7 = _fjsp_load_v2r8(ptrB+2); - t8 = _fjsp_load_v2r8(ptrB+4); - t9 = _fjsp_load_v2r8(ptrB+6); - t10 = _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB+8); - - tA = _fjsp_unpacklo_v2r8(x1, y1); - tB = _fjsp_unpackhi_v2r8(x1, y1); - tC = _fjsp_unpacklo_v2r8(z1, x2); - tD = _fjsp_unpackhi_v2r8(z1, x2); - tE = _fjsp_unpacklo_v2r8(y2, z2); - tF = _fjsp_unpackhi_v2r8(y2, z2); - tG = _fjsp_unpacklo_v2r8(x3, y3); - tH = _fjsp_unpackhi_v2r8(x3, y3); - tI = _fjsp_unpackhi_v2r8(z3, z3); - - t1 = _fjsp_sub_v2r8(t1, tA); - t2 = _fjsp_sub_v2r8(t2, tC); - t3 = _fjsp_sub_v2r8(t3, tE); - t4 = _fjsp_sub_v2r8(t4, tG); - t5 = _fjsp_sub_v2r8(t5, z3); - - t6 = _fjsp_sub_v2r8(t6, tB); - t7 = _fjsp_sub_v2r8(t7, tD); - t8 = _fjsp_sub_v2r8(t8, tF); - t9 = _fjsp_sub_v2r8(t9, tH); - t10 = _fjsp_sub_v2r8(t10, tI); - - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storeh_v2r8(ptrA+1, t1); - _fjsp_storel_v2r8(ptrA+2, t2); - _fjsp_storeh_v2r8(ptrA+3, t2); - _fjsp_storel_v2r8(ptrA+4, t3); - _fjsp_storeh_v2r8(ptrA+5, t3); - _fjsp_storel_v2r8(ptrA+6, t4); - _fjsp_storeh_v2r8(ptrA+7, t4); - _fjsp_storel_v2r8(ptrA+8, t5); - _fjsp_storel_v2r8(ptrB, t6); - _fjsp_storeh_v2r8(ptrB+1, t6); - _fjsp_storel_v2r8(ptrB+2, t7); - _fjsp_storeh_v2r8(ptrB+3, t7); - _fjsp_storel_v2r8(ptrB+4, t8); - _fjsp_storeh_v2r8(ptrB+5, t8); - _fjsp_storel_v2r8(ptrB+6, t9); - _fjsp_storeh_v2r8(ptrB+7, t9); - _fjsp_storel_v2r8(ptrB+8, t10); -} - - -static void -gmx_fjsp_decrement_4rvec_2ptr_swizzle_v2r8(double * gmx_restrict ptrA, double * gmx_restrict ptrB, - _fjsp_v2r8 x1, _fjsp_v2r8 y1, _fjsp_v2r8 z1, - _fjsp_v2r8 x2, _fjsp_v2r8 y2, _fjsp_v2r8 z2, - _fjsp_v2r8 x3, _fjsp_v2r8 y3, _fjsp_v2r8 z3, - _fjsp_v2r8 x4, _fjsp_v2r8 y4, _fjsp_v2r8 z4) -{ - _fjsp_v2r8 t1, t2, t3, t4, t5, t6, t7, t8, t9, t10, t11, t12; - _fjsp_v2r8 tA, tB, tC, tD, tE, tF, tG, tH, tI, tJ, tK, tL; - - t1 = _fjsp_load_v2r8(ptrA); - t2 = _fjsp_load_v2r8(ptrA+2); - t3 = _fjsp_load_v2r8(ptrA+4); - t4 = _fjsp_load_v2r8(ptrA+6); - t5 = _fjsp_load_v2r8(ptrA+8); - t6 = _fjsp_load_v2r8(ptrA+10); - t7 = _fjsp_load_v2r8(ptrB); - t8 = _fjsp_load_v2r8(ptrB+2); - t9 = _fjsp_load_v2r8(ptrB+4); - t10 = _fjsp_load_v2r8(ptrB+6); - t11 = _fjsp_load_v2r8(ptrB+8); - t12 = _fjsp_load_v2r8(ptrB+10); - - tA = _fjsp_unpacklo_v2r8(x1, y1); - tB = _fjsp_unpackhi_v2r8(x1, y1); - tC = _fjsp_unpacklo_v2r8(z1, x2); - tD = _fjsp_unpackhi_v2r8(z1, x2); - tE = _fjsp_unpacklo_v2r8(y2, z2); - tF = _fjsp_unpackhi_v2r8(y2, z2); - tG = _fjsp_unpacklo_v2r8(x3, y3); - tH = _fjsp_unpackhi_v2r8(x3, y3); - tI = _fjsp_unpacklo_v2r8(z3, x4); - tJ = _fjsp_unpackhi_v2r8(z3, x4); - tK = _fjsp_unpacklo_v2r8(y4, z4); - tL = _fjsp_unpackhi_v2r8(y4, z4); - - t1 = _fjsp_sub_v2r8(t1, tA); - t2 = _fjsp_sub_v2r8(t2, tC); - t3 = _fjsp_sub_v2r8(t3, tE); - t4 = _fjsp_sub_v2r8(t4, tG); - t5 = _fjsp_sub_v2r8(t5, tI); - t6 = _fjsp_sub_v2r8(t6, tK); - - t7 = _fjsp_sub_v2r8(t7, tB); - t8 = _fjsp_sub_v2r8(t8, tD); - t9 = _fjsp_sub_v2r8(t9, tF); - t10 = _fjsp_sub_v2r8(t10, tH); - t11 = _fjsp_sub_v2r8(t11, tJ); - t12 = _fjsp_sub_v2r8(t12, tL); - - _fjsp_storel_v2r8(ptrA, t1); - _fjsp_storeh_v2r8(ptrA+1, t1); - _fjsp_storel_v2r8(ptrA+2, t2); - _fjsp_storeh_v2r8(ptrA+3, t2); - _fjsp_storel_v2r8(ptrA+4, t3); - _fjsp_storeh_v2r8(ptrA+5, t3); - _fjsp_storel_v2r8(ptrA+6, t4); - _fjsp_storeh_v2r8(ptrA+7, t4); - _fjsp_storel_v2r8(ptrA+8, t5); - _fjsp_storeh_v2r8(ptrA+9, t5); - _fjsp_storel_v2r8(ptrA+10, t6); - _fjsp_storeh_v2r8(ptrA+11, t6); - _fjsp_storel_v2r8(ptrB, t7); - _fjsp_storeh_v2r8(ptrB+1, t7); - _fjsp_storel_v2r8(ptrB+2, t8); - _fjsp_storeh_v2r8(ptrB+3, t8); - _fjsp_storel_v2r8(ptrB+4, t9); - _fjsp_storeh_v2r8(ptrB+5, t9); - _fjsp_storel_v2r8(ptrB+6, t10); - _fjsp_storeh_v2r8(ptrB+7, t10); - _fjsp_storel_v2r8(ptrB+8, t11); - _fjsp_storeh_v2r8(ptrB+9, t11); - _fjsp_storel_v2r8(ptrB+10, t12); - _fjsp_storeh_v2r8(ptrB+11, t12); -} - - - -static gmx_inline void -gmx_fjsp_update_iforce_1atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1, - double * gmx_restrict fptr, - double * gmx_restrict fshiftptr) -{ - __m128d t1, t2, t3, t4; - - /* transpose data */ - t1 = fix1; - fix1 = _fjsp_unpacklo_v2r8(fix1, fiy1); /* y0 x0 */ - fiy1 = _fjsp_unpackhi_v2r8(t1, fiy1); /* y1 x1 */ - - fix1 = _fjsp_add_v2r8(fix1, fiy1); - fiz1 = _fjsp_add_v2r8( fiz1, _fjsp_unpackhi_v2r8(fiz1, fiz1 )); - - t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 ); - _fjsp_storel_v2r8( fptr, t4 ); - _fjsp_storeh_v2r8( fptr+1, t4 ); - _fjsp_storel_v2r8( fptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fptr+2), fiz1 )); - - t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 ); - _fjsp_storel_v2r8( fshiftptr, t4 ); - _fjsp_storeh_v2r8( fshiftptr+1, t4 ); - _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 )); -} - -static gmx_inline void -gmx_fjsp_update_iforce_3atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1, - _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2, - _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3, - double * gmx_restrict fptr, - double * gmx_restrict fshiftptr) -{ - __m128d t1, t2, t3, t4, t5, t6; - - /* transpose data */ - GMX_FJSP_TRANSPOSE2_V2R8(fix1, fiy1); - GMX_FJSP_TRANSPOSE2_V2R8(fiz1, fix2); - GMX_FJSP_TRANSPOSE2_V2R8(fiy2, fiz2); - t1 = fix3; - fix3 = _fjsp_unpacklo_v2r8(fix3, fiy3); /* y0 x0 */ - fiy3 = _fjsp_unpackhi_v2r8(t1, fiy3); /* y1 x1 */ - - fix1 = _fjsp_add_v2r8(fix1, fiy1); - fiz1 = _fjsp_add_v2r8(fiz1, fix2); - fiy2 = _fjsp_add_v2r8(fiy2, fiz2); - - fix3 = _fjsp_add_v2r8(fix3, fiy3); - fiz3 = _fjsp_add_v2r8( fiz3, _fjsp_unpackhi_v2r8(fiz3, fiz3)); - - t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 ); - t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 ); - t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 ); - t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 ); - - _fjsp_storel_v2r8( fptr, t3 ); - _fjsp_storeh_v2r8( fptr+1, t3 ); - _fjsp_storel_v2r8( fptr+2, t4 ); - _fjsp_storeh_v2r8( fptr+3, t4 ); - _fjsp_storel_v2r8( fptr+4, t5 ); - _fjsp_storeh_v2r8( fptr+5, t5 ); - _fjsp_storel_v2r8( fptr+6, t6 ); - _fjsp_storeh_v2r8( fptr+7, t6 ); - _fjsp_storel_v2r8( fptr+8, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fptr+8), fiz3 )); - - fix1 = _fjsp_add_v2r8(fix1, fix3); - t1 = _fjsp_shuffle_v2r8(fiz1, fiy2, GMX_FJSP_SHUFFLE2(0, 1)); - fix1 = _fjsp_add_v2r8(fix1, t1); /* x and y sums */ - - t2 = _fjsp_shuffle_v2r8(fiy2, fiy2, GMX_FJSP_SHUFFLE2(1, 1)); - fiz1 = _fjsp_add_v2r8(fiz1, fiz3); - fiz1 = _fjsp_add_v2r8(fiz1, t2); /* z sum */ - - t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 ); - _fjsp_storel_v2r8( fshiftptr, t3 ); - _fjsp_storeh_v2r8( fshiftptr+1, t3 ); - _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 )); -} - - -static gmx_inline void -gmx_fjsp_update_iforce_4atom_swizzle_v2r8(_fjsp_v2r8 fix1, _fjsp_v2r8 fiy1, _fjsp_v2r8 fiz1, - _fjsp_v2r8 fix2, _fjsp_v2r8 fiy2, _fjsp_v2r8 fiz2, - _fjsp_v2r8 fix3, _fjsp_v2r8 fiy3, _fjsp_v2r8 fiz3, - _fjsp_v2r8 fix4, _fjsp_v2r8 fiy4, _fjsp_v2r8 fiz4, - double * gmx_restrict fptr, - double * gmx_restrict fshiftptr) -{ - __m128d t1, t2, t3, t4, t5, t6, t7, t8; - - /* transpose data */ - GMX_FJSP_TRANSPOSE2_V2R8(fix1, fiy1); - GMX_FJSP_TRANSPOSE2_V2R8(fiz1, fix2); - GMX_FJSP_TRANSPOSE2_V2R8(fiy2, fiz2); - GMX_FJSP_TRANSPOSE2_V2R8(fix3, fiy3); - GMX_FJSP_TRANSPOSE2_V2R8(fiz3, fix4); - GMX_FJSP_TRANSPOSE2_V2R8(fiy4, fiz4); - - fix1 = _fjsp_add_v2r8(fix1, fiy1); - fiz1 = _fjsp_add_v2r8(fiz1, fix2); - fiy2 = _fjsp_add_v2r8(fiy2, fiz2); - fix3 = _fjsp_add_v2r8(fix3, fiy3); - fiz3 = _fjsp_add_v2r8(fiz3, fix4); - fiy4 = _fjsp_add_v2r8(fiy4, fiz4); - - t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr), fix1 ); - t4 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+2), fiz1 ); - t5 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+4), fiy2 ); - t6 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+6), fix3 ); - t7 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+8), fiz3 ); - t8 = _fjsp_add_v2r8( _fjsp_load_v2r8(fptr+10), fiy4 ); - _fjsp_storel_v2r8( fptr, t3 ); - _fjsp_storeh_v2r8( fptr+1, t3 ); - _fjsp_storel_v2r8( fptr+2, t4 ); - _fjsp_storeh_v2r8( fptr+3, t4 ); - _fjsp_storel_v2r8( fptr+4, t5 ); - _fjsp_storeh_v2r8( fptr+5, t5 ); - _fjsp_storel_v2r8( fptr+6, t6 ); - _fjsp_storeh_v2r8( fptr+7, t6 ); - _fjsp_storel_v2r8( fptr+8, t7 ); - _fjsp_storeh_v2r8( fptr+9, t7 ); - _fjsp_storel_v2r8( fptr+10, t8 ); - _fjsp_storeh_v2r8( fptr+11, t8 ); - - t1 = _fjsp_shuffle_v2r8(fiz1, fiy2, GMX_FJSP_SHUFFLE2(0, 1)); - fix1 = _fjsp_add_v2r8(fix1, t1); - t2 = _fjsp_shuffle_v2r8(fiz3, fiy4, GMX_FJSP_SHUFFLE2(0, 1)); - fix3 = _fjsp_add_v2r8(fix3, t2); - fix1 = _fjsp_add_v2r8(fix1, fix3); /* x and y sums */ - - fiz1 = _fjsp_add_v2r8(fiz1, _fjsp_unpackhi_v2r8(fiy2, fiy2)); - fiz3 = _fjsp_add_v2r8(fiz3, _fjsp_unpackhi_v2r8(fiy4, fiy4)); - fiz1 = _fjsp_add_v2r8(fiz1, fiz3); /* z sum */ - - t3 = _fjsp_add_v2r8( _fjsp_load_v2r8(fshiftptr), fix1 ); - _fjsp_storel_v2r8( fshiftptr, t3 ); - _fjsp_storeh_v2r8( fshiftptr+1, t3 ); - _fjsp_storel_v2r8( fshiftptr+2, _fjsp_add_v2r8( _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), fshiftptr+2), fiz1 )); -} - - - -static gmx_inline void -gmx_fjsp_update_1pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA) -{ - pot1 = _fjsp_add_v2r8(pot1, _fjsp_unpackhi_v2r8(pot1, pot1)); - _fjsp_storel_v2r8(ptrA, _fjsp_add_v2r8(pot1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA))); -} - -static gmx_inline void -gmx_fjsp_update_2pot_v2r8(_fjsp_v2r8 pot1, double * gmx_restrict ptrA, - _fjsp_v2r8 pot2, double * gmx_restrict ptrB) -{ - GMX_FJSP_TRANSPOSE2_V2R8(pot1, pot2); - pot1 = _fjsp_add_v2r8(pot1, pot2); - pot2 = _fjsp_unpackhi_v2r8(pot1, pot1); - - _fjsp_storel_v2r8(ptrA, _fjsp_add_v2r8(pot1, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrA))); - _fjsp_storel_v2r8(ptrB, _fjsp_add_v2r8(pot2, _fjsp_loadl_v2r8(_fjsp_setzero_v2r8(), ptrB))); -} - - -#endif /* _kernelutil_sparc64_hpc_ace_double_h_ */ diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py deleted file mode 100755 index d49e1cac78..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/make_nb_kernel_sparc64_hpc_ace_double.py +++ /dev/null @@ -1,513 +0,0 @@ -#!/usr/bin/env python2 -# -# This file is part of the GROMACS molecular simulation package. -# -# Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by -# Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, -# and including many others, as listed in the AUTHORS file in the -# top-level source directory and at http://www.gromacs.org. -# -# GROMACS is free software; you can redistribute it and/or -# modify it under the terms of the GNU Lesser General Public License -# as published by the Free Software Foundation; either version 2.1 -# of the License, or (at your option) any later version. -# -# GROMACS is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -# Lesser General Public License for more details. -# -# You should have received a copy of the GNU Lesser General Public -# License along with GROMACS; if not, see -# http://www.gnu.org/licenses, or write to the Free Software Foundation, -# Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -# -# If you want to redistribute modifications to GROMACS, please -# consider that scientific software is very special. Version -# control is crucial - bugs must be traceable. We will be happy to -# consider code for inclusion in the official distribution, but -# derived work must not be called official GROMACS. Details are found -# in the README & COPYING files - if they are missing, get the -# official version at http://www.gromacs.org. -# -# To help us fund GROMACS development, we humbly ask that you cite -# the research papers on the package. Check out http://www.gromacs.org. - -import sys -import os -sys.path.append("../preprocessor") -sys.path.append("../../../../../admin") -from copyright import create_copyright_header -from gmxpreprocess import gmxpreprocess - -# "The happiest programs are programs that write other programs." -# -# -# This script controls the generation of Gromacs nonbonded kernels. -# -# We no longer generate kernels on-the-fly, so this file is not run -# during a Gromacs compile - only when we need to update the kernels (=rarely). -# -# To maximize performance, each combination of interactions in Gromacs -# has a separate nonbonded kernel without conditionals in the code. -# To avoid writing hundreds of different routines for each architecture, -# we instead use a custom preprocessor so we can encode the conditionals -# and expand for-loops (e.g, for water-water interactions) -# from a general kernel template. While that file will contain quite a -# few preprocessor directives, it is still an order of magnitude easier -# to maintain than ~200 different kernels (not to mention it avoids bugs). -# -# To actually generate the kernels, this program iteratively calls the -# preprocessor with different define settings corresponding to all -# combinations of coulomb/van-der-Waals/geometry options. -# -# A main goal in the design was to make this new generator _general_. For -# this reason we have used a lot of different fields to identify a particular -# kernel and interaction. Basically, each kernel will have a name like -# -# nbkernel_ElecXX_VdwYY_GeomZZ_VF_QQ() -# -# Where XX/YY/ZZ/VF are strings to identify what the kernel computes. -# -# Elec/Vdw describe the type of interaction for electrostatics and van der Waals. -# The geometry settings correspond e.g. to water-water or water-particle kernels, -# and finally the VF setting is V,F,or VF depending on whether we calculate -# only the potential, only the force, or both of them. The final string (QQ) -# is the architecture/language/optimization of the kernel. -# -Arch = 'sparc64_hpc_ace_double' - -# Explanation of the 'properties': -# -# It is cheap to compute r^2, and the kernels require various other functions of r for -# different kinds of interaction. Depending on the needs of the kernel and the available -# processor instructions, this will be done in different ways. -# -# 'rinv' means we need 1/r, which is calculated as 1/sqrt(r^2). -# 'rinvsq' means we need 1/(r*r). This is calculated as rinv*rinv if we already did rinv, otherwise 1/r^2. -# 'r' is similarly calculated as r^2*rinv when needed -# 'table' means the interaction is tabulated, in which case we will calculate a table index before the interaction -# 'shift' means the interaction will be modified by a constant to make it zero at the cutoff. -# 'cutoff' means the interaction is set to 0.0 outside the cutoff -# - -FileHeader = create_copyright_header('2012,2013,2014,2015,2017,2018') -FileHeader += """/* - * Note: this file was generated by the GROMACS """+Arch+""" kernel generator. - */ -""" - -############################################### -# ELECTROSTATICS -# Interactions and flags for them -############################################### -ElectrostaticsList = { - 'None' : [], - 'Coulomb' : ['rinv','rinvsq'], - 'ReactionField' : ['rinv','rinvsq'], - 'CubicSplineTable' : ['rinv','r','table'], - 'Ewald' : ['rinv','rinvsq','r'], -} - - -############################################### -# VAN DER WAALS -# Interactions and flags for them -############################################### -VdwList = { - 'None' : [], - 'LennardJones' : ['rinvsq'], -# 'Buckingham' : ['rinv','rinvsq','r'], # Disabled for sse4.1 to reduce number of kernels and simply the template - 'CubicSplineTable' : ['rinv','r','table'], - 'LJEwald' : ['rinv','rinvsq','r'], -} - - -############################################### -# MODIFIERS -# Different ways to adjust/modify interactions to conserve energy -############################################### -ModifierList = { - 'None' : [], - 'ExactCutoff' : ['exactcutoff'], # Zero the interaction outside the cutoff, used for reaction-field-zero - 'PotentialShift' : ['shift','exactcutoff'], - 'PotentialSwitch' : ['rinv','r','switch','exactcutoff'] -} - - -############################################### -# GEOMETRY COMBINATIONS -############################################### -GeometryNameList = [ - [ 'Particle' , 'Particle' ], - [ 'Water3' , 'Particle' ], - [ 'Water3' , 'Water3' ], - [ 'Water4' , 'Particle' ], - [ 'Water4' , 'Water4' ] -] - - -############################################### -# POTENTIAL / FORCE -############################################### -VFList = [ - 'PotentialAndForce', -# 'Potential', # Not used yet - 'Force' -] - - -############################################### -# GEOMETRY PROPERTIES -############################################### -# Dictionaries with lists telling which interactions are present -# 1,2,3 means particles 1,2,3 (but not 0) have electrostatics! -GeometryElectrostatics = { - 'Particle' : [ 0 ], - 'Particle2' : [ 0 , 1 ], - 'Particle3' : [ 0 , 1 , 2 ], - 'Particle4' : [ 0 , 1 , 2 , 3 ], - 'Water3' : [ 0 , 1 , 2 ], - 'Water4' : [ 1 , 2 , 3 ] -} - -GeometryVdw = { - 'Particle' : [ 0 ], - 'Particle2' : [ 0 , 1 ], - 'Particle3' : [ 0 , 1 , 2 ], - 'Particle4' : [ 0 , 1 , 2 , 3 ], - 'Water3' : [ 0 ], - 'Water4' : [ 0 ] -} - - - - -# Dictionary to abbreviate all strings (mixed from all the lists) -Abbreviation = { - 'None' : 'None', - 'Coulomb' : 'Coul', - 'Ewald' : 'Ew', - 'ReactionField' : 'RF', - 'CubicSplineTable' : 'CSTab', - 'LennardJones' : 'LJ', - 'Buckingham' : 'Bham', - 'LJEwald' : 'LJEw', - 'PotentialShift' : 'Sh', - 'PotentialSwitch' : 'Sw', - 'ExactCutoff' : 'Cut', - 'PotentialAndForce' : 'VF', - 'Potential' : 'V', - 'Force' : 'F', - 'Water3' : 'W3', - 'Water4' : 'W4', - 'Particle' : 'P1', - 'Particle2' : 'P2', - 'Particle3' : 'P3', - 'Particle4' : 'P4' -} - - -############################################### -# Functions -############################################### - -# Return a string with the kernel name from current settings -def MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom): - ElecStr = 'Elec' + Abbreviation[KernelElec] - if(KernelElecMod!='None'): - ElecStr = ElecStr + Abbreviation[KernelElecMod] - VdwStr = 'Vdw' + Abbreviation[KernelVdw] - if(KernelVdwMod!='None'): - VdwStr = VdwStr + Abbreviation[KernelVdwMod] - GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]] - return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + Arch - -def MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF): - ElecStr = 'Elec' + Abbreviation[KernelElec] - if(KernelElecMod!='None'): - ElecStr = ElecStr + Abbreviation[KernelElecMod] - VdwStr = 'Vdw' + Abbreviation[KernelVdw] - if(KernelVdwMod!='None'): - VdwStr = VdwStr + Abbreviation[KernelVdwMod] - GeomStr = 'Geom' + Abbreviation[KernelGeom[0]] + Abbreviation[KernelGeom[1]] - VFStr = Abbreviation[KernelVF] - return 'nb_kernel_' + ElecStr + '_' + VdwStr + '_' + GeomStr + '_' + VFStr + '_' + Arch - -# Return a string with a declaration to use for the kernel; -# this will be a sequence of string combinations as well as the actual function name -# Dont worry about field widths - that is just pretty-printing for the header! -def MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF): - KernelStr = '\"'+KernelName+'\"' - ArchStr = '\"'+Arch+'\"' - ElecStr = '\"'+KernelElec+'\"' - ElecModStr = '\"'+KernelElecMod+'\"' - VdwStr = '\"'+KernelVdw+'\"' - VdwModStr = '\"'+KernelVdwMod+'\"' - GeomStr = '\"'+KernelGeom[0]+KernelGeom[1]+'\"' - OtherStr = '\"'+KernelOther+'\"' - VFStr = '\"'+KernelVF+'\"' - - ThisSpec = ArchStr+', '+ElecStr+', '+ElecModStr+', '+VdwStr+', '+VdwModStr+', '+GeomStr+', '+OtherStr+', '+VFStr - ThisDecl = ' { '+KernelName+', '+KernelStr+', '+ThisSpec+' }' - return ThisDecl - - -# Returns 1 if this kernel should be created, 0 if we should skip it -# This routine is not critical - it is not the end of the world if we create more kernels, -# but since the number is pretty large we save both space and compile-time by reducing it a bit. -def KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF): - - # No need for kernels without interactions - if(KernelElec=='None' and KernelVdw=='None'): - return 0 - - # No need for modifiers without interactions - if((KernelElec=='None' and KernelElecMod!='None') or (KernelVdw=='None' and KernelVdwMod!='None')): - return 0 - - # No need for LJ-only water optimization, or water optimization with implicit solvent. - if('Water' in KernelGeom[0] and KernelElec=='None'): - return 0 - - # Non-matching table settings are pointless - if( ('Table' in KernelElec) and ('Table' in KernelVdw) and KernelElec!=KernelVdw ): - return 0 - - # Try to reduce the number of different switch/shift options to get a reasonable number of kernels - # For electrostatics, reaction-field can use 'exactcutoff', and ewald can use switch or shift. - if(KernelElecMod=='ExactCutoff' and KernelElec!='ReactionField'): - return 0 - if(KernelElecMod in ['PotentialShift','PotentialSwitch'] and KernelElec!='Ewald'): - return 0 - # For Vdw, we support switch and shift for Lennard-Jones/Buckingham - if((KernelVdwMod=='ExactCutoff') or - (KernelVdwMod in ['PotentialShift','PotentialSwitch'] and KernelVdw not in ['LennardJones','Buckingham','LJEwald'])): - return 0 - - # For LJEwald, we only support shift - if(KernelVdw=='LJEwald' and KernelVdwMod=='PotentialSwitch'): - return 0 - - # Choose either switch or shift and don't mix them... - if((KernelElecMod=='PotentialShift' and KernelVdwMod=='PotentialSwitch') or - (KernelElecMod=='PotentialSwitch' and KernelVdwMod=='PotentialShift')): - return 0 - - # Don't use a Vdw kernel with a modifier if the electrostatics one does not have one - if(KernelElec!='None' and KernelElecMod=='None' and KernelVdwMod!='None'): - return 0 - - # Don't use an electrostatics kernel with a modifier if the vdw one does not have one, - # unless the electrostatics one is reaction-field with exact cutoff. - if(KernelVdw!='None' and KernelVdwMod=='None' and KernelElecMod!='None'): - if(KernelElec=='ReactionField' and KernelVdw!='CubicSplineTable'): - return 0 - elif(KernelElec!='ReactionField'): - return 0 - - #Only do LJ-PME if we are also doing PME for electrostatics, or no electrostatics at all. - if(KernelVdw=='LJEwald' and KernelElec not in ['Ewald','None']): - return 0 - - return 1 - - - -# -# The preprocessor will automatically expand the interactions for water and other -# geometries inside the kernel, but to get this right we need to setup a couple -# of defines - we do them in a separate routine to keep the main loop clean. -# -# While this routine might look a bit complex it is actually quite straightforward, -# and the best news is that you wont have to modify _anything_ for a new geometry -# as long as you correctly define its Electrostatics/Vdw geometry in the lists above! -# -def SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines): - # What is the _name_ for the i/j group geometry? - igeometry = KernelGeom[0] - jgeometry = KernelGeom[1] - # define so we can access it in the source when the preprocessor runs - defines['GEOMETRY_I'] = igeometry - defines['GEOMETRY_J'] = jgeometry - - # For the i/j groups, extract a python list of which sites have electrostatics - # For SPC/TIP3p this will be [1,1,1], while TIP4p (no elec on first site) will be [0,1,1,1] - ielec = GeometryElectrostatics[igeometry] - jelec = GeometryElectrostatics[jgeometry] - # Zero out the corresponding lists in case we dont do Elec - if(KernelElec=='None'): - ielec = [] - jelec = [] - - # Extract similar interaction lists for Vdw interactions (example for SPC: [1,0,0]) - iVdw = GeometryVdw[igeometry] - jVdw = GeometryVdw[jgeometry] - - # Zero out the corresponding lists in case we dont do Vdw - if(KernelVdw=='None'): - iVdw = [] - jVdw = [] - - # iany[] and jany[] contains lists of the particles actually used (for interactions) in this kernel - iany = list(set(ielec+iVdw)) # convert to+from set to make elements unique - jany = list(set(jelec+jVdw)) - - defines['PARTICLES_ELEC_I'] = ielec - defines['PARTICLES_ELEC_J'] = jelec - defines['PARTICLES_VDW_I'] = iVdw - defines['PARTICLES_VDW_J'] = jVdw - defines['PARTICLES_I'] = iany - defines['PARTICLES_J'] = jany - - # elecij,Vdwij are sets with pairs of particles for which the corresponding interaction is done - # (and anyij again corresponds to either electrostatics or Vdw) - elecij = [] - Vdwij = [] - anyij = [] - - for i in ielec: - for j in jelec: - elecij.append([i,j]) - - for i in iVdw: - for j in jVdw: - Vdwij.append([i,j]) - - for i in iany: - for j in jany: - if [i,j] in elecij or [i,j] in Vdwij: - anyij.append([i,j]) - - defines['PAIRS_IJ'] = anyij - - # Make an 2d list-of-distance-properties-to-calculate for i,j - ni = max(iany)+1 - nj = max(jany)+1 - # Each element properties[i][j] is an empty list - properties = [ [ [] for j in range(0,nj) ] for i in range (0,ni) ] - # Add properties to each set - for i in range(0,ni): - for j in range(0,nj): - if [i,j] in elecij: - properties[i][j] = properties[i][j] + ['electrostatics'] + ElectrostaticsList[KernelElec] + ModifierList[KernelElecMod] - if [i,j] in Vdwij: - properties[i][j] = properties[i][j] + ['vdw'] + VdwList[KernelVdw] + ModifierList[KernelVdwMod] - # Add rinv if we need r - if 'r' in properties[i][j]: - properties[i][j] = properties[i][j] + ['rinv'] - # Add rsq if we need rinv or rinsq - if 'rinv' in properties[i][j] or 'rinvsq' in properties[i][j]: - properties[i][j] = properties[i][j] + ['rsq'] - - defines['INTERACTION_FLAGS'] = properties - - - -def PrintStatistics(ratio): - ratio = 100.0*ratio - print '\rGenerating %s nonbonded kernels... %5.1f%%' % (Arch,ratio), - sys.stdout.flush() - - - -defines = {} -kerneldecl = [] - -cnt = 0.0 -nelec = len(ElectrostaticsList) -nVdw = len(VdwList) -nmod = len(ModifierList) -ngeom = len(GeometryNameList) - -ntot = nelec*nmod*nVdw*nmod*ngeom - -numKernels = 0 - -fpdecl = open('nb_kernel_' + Arch + '.cpp','w') -fpdecl.write( FileHeader ) -fpdecl.write( '#include "gmxpre.h"\n\n' ) -fpdecl.write( '#include "gromacs/gmxlib/nonbonded/nb_kernel.h"\n\n' ) - -for KernelElec in ElectrostaticsList: - defines['KERNEL_ELEC'] = KernelElec - - for KernelElecMod in ModifierList: - defines['KERNEL_MOD_ELEC'] = KernelElecMod - - for KernelVdw in VdwList: - defines['KERNEL_VDW'] = KernelVdw - - for KernelVdwMod in ModifierList: - defines['KERNEL_MOD_VDW'] = KernelVdwMod - - for KernelGeom in GeometryNameList: - - cnt += 1 - KernelFilename = MakeKernelFileName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom) + '.cpp' - fpkernel = open(KernelFilename,'w') - defines['INCLUDE_HEADER'] = 1 # Include header first time in new file - DoHeader = 1 - - for KernelVF in VFList: - - KernelName = MakeKernelName(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF) - - defines['KERNEL_NAME'] = KernelName - defines['KERNEL_VF'] = KernelVF - - # Check if this is a valid/sane/usable combination - if not KeepKernel(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF): - continue; - - # The overall kernel settings determine what the _kernel_ calculates, but for the water - # kernels this does not mean that every pairwise interaction has e.g. Vdw interactions. - # This routine sets defines of what to calculate for each pair of particles in those cases. - SetDefines(KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelVF,defines) - - if(DoHeader==1): - fpkernel.write( FileHeader ) - - gmxpreprocess('nb_kernel_template_' + Arch + '.pre', KernelName+'.tmp' , defines, force=1,contentType='C') - numKernels = numKernels + 1 - - defines['INCLUDE_HEADER'] = 0 # Header has been included once now - DoHeader=0 - - # Append temp file contents to the common kernelfile - fptmp = open(KernelName+'.tmp','r') - fpkernel.writelines(fptmp.readlines()) - fptmp.close() - os.remove(KernelName+'.tmp') - - # Add an extern declaration for this kernel - fpdecl.write('extern nb_kernel_t ' + KernelName + ';\n'); - - # Add declaration to the buffer - KernelOther='' - kerneldecl.append(MakeKernelDecl(KernelName,KernelElec,KernelElecMod,KernelVdw,KernelVdwMod,KernelGeom,KernelOther,KernelVF)) - - filesize = fpkernel.tell() - fpkernel.close() - if(filesize==0): - os.remove(KernelFilename) - - PrintStatistics(cnt/ntot) - pass - pass - pass - pass -pass - -# Write out the list of settings and corresponding kernels to the declaration file -fpdecl.write( '\n\n' ) -fpdecl.write( 'nb_kernel_info_t\n' ) -fpdecl.write( ' kernellist_'+Arch+'[] =\n' ) -fpdecl.write( '{\n' ) -for decl in kerneldecl[0:-1]: - fpdecl.write( decl + ',\n' ) -fpdecl.write( kerneldecl[-1] + '\n' ) -fpdecl.write( '};\n\n' ) -fpdecl.write( 'int\n' ) -fpdecl.write( ' kernellist_'+Arch+'_size = sizeof(kernellist_'+Arch+')/sizeof(kernellist_'+Arch+'[0]);\n') -fpdecl.close() diff --git a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp b/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp deleted file mode 100644 index 24aca65925..0000000000 --- a/src/gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_sparc64_hpc_ace_double.cpp +++ /dev/null @@ -1,711 +0,0 @@ -/* - * This file is part of the GROMACS molecular simulation package. - * - * Copyright (c) 2012,2013,2014,2015,2017,2018, by the GROMACS development team, led by - * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl, - * and including many others, as listed in the AUTHORS file in the - * top-level source directory and at http://www.gromacs.org. - * - * GROMACS is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public License - * as published by the Free Software Foundation; either version 2.1 - * of the License, or (at your option) any later version. - * - * GROMACS is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with GROMACS; if not, see - * http://www.gnu.org/licenses, or write to the Free Software Foundation, - * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. - * - * If you want to redistribute modifications to GROMACS, please - * consider that scientific software is very special. Version - * control is crucial - bugs must be traceable. We will be happy to - * consider code for inclusion in the official distribution, but - * derived work must not be called official GROMACS. Details are found - * in the README & COPYING files - if they are missing, get the - * official version at http://www.gromacs.org. - * - * To help us fund GROMACS development, we humbly ask that you cite - * the research papers on the package. Check out http://www.gromacs.org. - */ -/* - * Note: this file was generated by the GROMACS sparc64_hpc_ace_double kernel generator. - */ -#include "gmxpre.h" - -#include "config.h" - -#include - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*76); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*171); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*444); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*200); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*476); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*59); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*427); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*176); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*452); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*46); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*141); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*414); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*141); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*414); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: CubicSplineTable - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCSTab_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*66); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*131); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*314); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*155); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*341); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*43); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*108); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*291); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*131); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*317); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*31); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*96); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*279); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*96); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*279); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Coulomb - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecCoul_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*79); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*180); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_01; - _fjsp_v2r8 c6grid_02; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*471); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_01; - _fjsp_v2r8 c6grid_02; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_30; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*209); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_30; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_13; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - _fjsp_v2r8 c6grid_23; - _fjsp_v2r8 c6grid_31; - _fjsp_v2r8 c6grid_32; - _fjsp_v2r8 c6grid_33; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*503); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJEwSh_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_13; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - _fjsp_v2r8 c6grid_23; - _fjsp_v2r8 c6grid_31; - _fjsp_v2r8 c6grid_32; - _fjsp_v2r8 c6grid_33; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*67); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*168); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*459); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*488); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*49); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*150); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*441); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*150); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*441); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSh_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*86); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*225); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*630); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*269); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*677); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*68); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*207); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*612); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*207); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*612); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEwSw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*78); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*169); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*430); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*194); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*458); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*68); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*159); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_01; - _fjsp_v2r8 c6grid_02; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*420); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_01; - _fjsp_v2r8 c6grid_02; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_30; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*185); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_10; - _fjsp_v2r8 c6grid_20; - _fjsp_v2r8 c6grid_30; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_13; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - _fjsp_v2r8 c6grid_23; - _fjsp_v2r8 c6grid_31; - _fjsp_v2r8 c6grid_32; - _fjsp_v2r8 c6grid_33; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*449); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LJEwald - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJEw_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - _fjsp_v2r8 c6grid_11; - _fjsp_v2r8 c6grid_12; - _fjsp_v2r8 c6grid_13; - _fjsp_v2r8 c6grid_21; - _fjsp_v2r8 c6grid_22; - _fjsp_v2r8 c6grid_23; - _fjsp_v2r8 c6grid_31; - _fjsp_v2r8 c6grid_32; - _fjsp_v2r8 c6grid_33; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - c6grid_00 = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset0+vdwjidx0A]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*56); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*147); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*408); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*170); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*434); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*44); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*135); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*396); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*135); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecEw_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*396); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: Ewald - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecEw_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecNone_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecNone_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - rcutoff_scalar = fr->ic->rvdw; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*59); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecNone_VdwLJEwSh_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - rcutoff_scalar = fr->ic->rvdw; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecNone_VdwLJEw_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*50); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LJEwald - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecNone_VdwLJEw_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 c6grid_00; - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecNone_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - rcutoff_scalar = fr->ic->rvdw; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*44); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecNone_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - rcutoff_scalar = fr->ic->rvdw; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecNone_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - rcutoff_scalar = fr->ic->rvdw; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*62); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecNone_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - rcutoff_scalar = fr->ic->rvdw; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecNone_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 7 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_VDW_VF,outeriter*7 + inneriter*35); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: None - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecNone_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*75); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*156); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*387); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*179); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*413); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*57); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*138); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*369); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*164); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*398); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*73); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*154); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*385); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*182); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*416); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*39); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*120); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*351); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*120); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*351); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRFCut_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*70); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*143); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*350); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*167); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*377); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: CubicSplineTable - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwCSTab_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwLJ_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 9 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_VF,outeriter*9 + inneriter*47); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwLJ_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3_VF,outeriter*20 + inneriter*120); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 20 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W3W3_VF,outeriter*20 + inneriter*327); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - vdwjidx0A = 2*vdwtype[inr+0]; - qq00 = _fjsp_mul_v2r8(iq0,jq0); - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4_VF,outeriter*26 + inneriter*143); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 26 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW_W4W4_VF,outeriter*26 + inneriter*353); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: LennardJones - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwLJ_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - vdwioffset0 = 2*nvdwtype*vdwtype[inr+0]; - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - vdwjidx0A = 2*vdwtype[inr+0]; - c6_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A]); - c12_00 = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset0+vdwjidx0A+1]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwNone_GeomP1P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 8 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VF,outeriter*8 + inneriter*35); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Particle-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwNone_GeomP1P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwNone_GeomW3P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3_VF,outeriter*19 + inneriter*108); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwNone_GeomW3P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwNone_GeomW3W3_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W3W3_VF,outeriter*19 + inneriter*315); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water3-Water3 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwNone_GeomW3W3_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset0; - _fjsp_v2r8 ix0,iy0,iz0,fix0,fiy0,fiz0,iq0,isai0; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - _fjsp_v2r8 dx00,dy00,dz00,rsq00,rinv00,rinvsq00,r00,qq00,c6_00,c12_00; - _fjsp_v2r8 dx01,dy01,dz01,rsq01,rinv01,rinvsq01,r01,qq01,c6_01,c12_01; - _fjsp_v2r8 dx02,dy02,dz02,rsq02,rinv02,rinvsq02,r02,qq02,c6_02,c12_02; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq0 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+0])); - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - - jq0 = gmx_fjsp_set1_v2r8(charge[inr+0]); - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - qq00 = _fjsp_mul_v2r8(iq0,jq0); - qq01 = _fjsp_mul_v2r8(iq0,jq1); - qq02 = _fjsp_mul_v2r8(iq0,jq2); - qq10 = _fjsp_mul_v2r8(iq1,jq0); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq20 = _fjsp_mul_v2r8(iq2,jq0); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwNone_GeomW4P1_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4_VF,outeriter*19 + inneriter*108); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Particle - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwNone_GeomW4P1_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx0A,vdwjidx0B; - _fjsp_v2r8 jx0,jy0,jz0,fjx0,fjy0,fjz0,jq0,isaj0; - _fjsp_v2r8 dx10,dy10,dz10,rsq10,rinv10,rinvsq10,r10,qq10,c6_10,c12_10; - _fjsp_v2r8 dx20,dy20,dz20,rsq20,rinv20,rinvsq20,r20,qq20,c6_20,c12_20; - _fjsp_v2r8 dx30,dy30,dz30,rsq30,rinv30,rinvsq30,r30,qq30,c6_30,c12_30; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" - -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: PotentialAndForce - */ -void -nb_kernel_ElecRF_VdwNone_GeomW4W4_VF_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses 19 flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_W4W4_VF,outeriter*19 + inneriter*315); -} -/* - * Gromacs nonbonded kernel: nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - * Electrostatics interaction: ReactionField - * VdW interaction: None - * Geometry: Water4-Water4 - * Calculate force/pot: Force - */ -void -nb_kernel_ElecRF_VdwNone_GeomW4W4_F_sparc64_hpc_ace_double - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - int vdwioffset1; - _fjsp_v2r8 ix1,iy1,iz1,fix1,fiy1,fiz1,iq1,isai1; - int vdwioffset2; - _fjsp_v2r8 ix2,iy2,iz2,fix2,fiy2,fiz2,iq2,isai2; - int vdwioffset3; - _fjsp_v2r8 ix3,iy3,iz3,fix3,fiy3,fiz3,iq3,isai3; - int vdwjidx1A,vdwjidx1B; - _fjsp_v2r8 jx1,jy1,jz1,fjx1,fjy1,fjz1,jq1,isaj1; - int vdwjidx2A,vdwjidx2B; - _fjsp_v2r8 jx2,jy2,jz2,fjx2,fjy2,fjz2,jq2,isaj2; - int vdwjidx3A,vdwjidx3B; - _fjsp_v2r8 jx3,jy3,jz3,fjx3,fjy3,fjz3,jq3,isaj3; - _fjsp_v2r8 dx11,dy11,dz11,rsq11,rinv11,rinvsq11,r11,qq11,c6_11,c12_11; - _fjsp_v2r8 dx12,dy12,dz12,rsq12,rinv12,rinvsq12,r12,qq12,c6_12,c12_12; - _fjsp_v2r8 dx13,dy13,dz13,rsq13,rinv13,rinvsq13,r13,qq13,c6_13,c12_13; - _fjsp_v2r8 dx21,dy21,dz21,rsq21,rinv21,rinvsq21,r21,qq21,c6_21,c12_21; - _fjsp_v2r8 dx22,dy22,dz22,rsq22,rinv22,rinvsq22,r22,qq22,c6_22,c12_22; - _fjsp_v2r8 dx23,dy23,dz23,rsq23,rinv23,rinvsq23,r23,qq23,c6_23,c12_23; - _fjsp_v2r8 dx31,dy31,dz31,rsq31,rinv31,rinvsq31,r31,qq31,c6_31,c12_31; - _fjsp_v2r8 dx32,dy32,dz32,rsq32,rinv32,rinvsq32,r32,qq32,c6_32,c12_32; - _fjsp_v2r8 dx33,dy33,dz33,rsq33,rinv33,rinvsq33,r33,qq33,c6_33,c12_33; - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - iq1 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+1])); - iq2 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+2])); - iq3 = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+3])); - - jq1 = gmx_fjsp_set1_v2r8(charge[inr+1]); - jq2 = gmx_fjsp_set1_v2r8(charge[inr+2]); - jq3 = gmx_fjsp_set1_v2r8(charge[inr+3]); - qq11 = _fjsp_mul_v2r8(iq1,jq1); - qq12 = _fjsp_mul_v2r8(iq1,jq2); - qq13 = _fjsp_mul_v2r8(iq1,jq3); - qq21 = _fjsp_mul_v2r8(iq2,jq1); - qq22 = _fjsp_mul_v2r8(iq2,jq2); - qq23 = _fjsp_mul_v2r8(iq2,jq3); - qq31 = _fjsp_mul_v2r8(iq3,jq1); - qq32 = _fjsp_mul_v2r8(iq3,jq2); - qq33 = _fjsp_mul_v2r8(iq3,jq3); - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidx - -#include "../nb_kernel.h" -#include "gromacs/gmxlib/nrnb.h" - -#include "kernelutil_sparc64_hpc_ace_double.h" -/* #endif */ - -/* ## List of variables set by the generating script: */ -/* ## */ -/* ## Setttings that apply to the entire kernel: */ -/* ## KERNEL_ELEC: String, choice for electrostatic interactions */ -/* ## KERNEL_VDW: String, choice for van der Waals interactions */ -/* ## KERNEL_NAME: String, name of this kernel */ -/* ## KERNEL_VF: String telling if we calculate potential, force, or both */ -/* ## GEOMETRY_I/GEOMETRY_J: String, name of each geometry, e.g. 'Water3' or '1Particle' */ -/* ## */ -/* ## Setttings that apply to particles in the outer (I) or inner (J) loops: */ -/* ## PARTICLES_I[]/ Arrays with lists of i/j particles to use in kernel. It is */ -/* ## PARTICLES_J[]: just [0] for particle geometry, but can be longer for water */ -/* ## PARTICLES_ELEC_I[]/ Arrays with lists of i/j particle that have electrostatics */ -/* ## PARTICLES_ELEC_J[]: interactions that should be calculated in this kernel. */ -/* ## PARTICLES_VDW_I[]/ Arrays with the list of i/j particle that have VdW */ -/* ## PARTICLES_VDW_J[]: interactions that should be calculated in this kernel. */ -/* ## */ -/* ## Setttings for pairs of interactions (e.g. 2nd i particle against 1st j particle) */ -/* ## PAIRS_IJ[]: Array with (i,j) tuples of pairs for which interactions */ -/* ## should be calculated in this kernel. Zero-charge particles */ -/* ## do not have interactions with particles without vdw, and */ -/* ## Vdw-only interactions are not evaluated in a no-vdw-kernel. */ -/* ## INTERACTION_FLAGS[][]: 2D matrix, dimension e.g. 3*3 for water-water interactions. */ -/* ## For each i-j pair, the element [I][J] is a list of strings */ -/* ## defining properties/flags of this interaction. Examples */ -/* ## include 'electrostatics'/'vdw' if that type of interaction */ -/* ## should be evaluated, 'rsq'/'rinv'/'rinvsq' if those values */ -/* ## are needed, and 'exactcutoff' or 'shift','switch' to */ -/* ## decide if the force/potential should be modified. This way */ -/* ## we only calculate values absolutely needed for each case. */ - -/* ## Calculate the size and offset for (merged/interleaved) table data */ - -/* - * Gromacs nonbonded kernel: {KERNEL_NAME} - * Electrostatics interaction: {KERNEL_ELEC} - * VdW interaction: {KERNEL_VDW} - * Geometry: {GEOMETRY_I}-{GEOMETRY_J} - * Calculate force/pot: {KERNEL_VF} - */ -void -{KERNEL_NAME} - (t_nblist * gmx_restrict nlist, - rvec * gmx_restrict xx, - rvec * gmx_restrict ff, - struct t_forcerec * gmx_restrict fr, - t_mdatoms * gmx_restrict mdatoms, - nb_kernel_data_t gmx_unused * gmx_restrict kernel_data, - t_nrnb * gmx_restrict nrnb) -{ - /* ## Not all variables are used for all kernels, but any optimizing compiler fixes that, */ - /* ## so there is no point in going to extremes to exclude variables that are not needed. */ - /* Suffixes 0,1,2,3 refer to particle indices for waters in the inner or outer loop, or - * just 0 for non-waters. - * Suffixes A,B refer to j loop unrolling done with double precision SIMD, e.g. for the two different - * jnr indices corresponding to data put in the four positions in the SIMD register. - */ - int i_shift_offset,i_coord_offset,outeriter,inneriter; - int j_index_start,j_index_end,jidx,nri,inr,ggid,iidx; - int jnrA,jnrB; - int j_coord_offsetA,j_coord_offsetB; - int *iinr,*jindex,*jjnr,*shiftidx,*gid; - real rcutoff_scalar; - real *shiftvec,*fshift,*x,*f; - _fjsp_v2r8 tx,ty,tz,fscal,rcutoff,rcutoff2,jidxall; - /* #for I in PARTICLES_I */ - int vdwioffset{I}; - _fjsp_v2r8 ix{I},iy{I},iz{I},fix{I},fiy{I},fiz{I},iq{I},isai{I}; - /* #endfor */ - /* #for J in PARTICLES_J */ - int vdwjidx{J}A,vdwjidx{J}B; - _fjsp_v2r8 jx{J},jy{J},jz{J},fjx{J},fjy{J},fjz{J},jq{J},isaj{J}; - /* #endfor */ - /* #for I,J in PAIRS_IJ */ - _fjsp_v2r8 dx{I}{J},dy{I}{J},dz{I}{J},rsq{I}{J},rinv{I}{J},rinvsq{I}{J},r{I}{J},qq{I}{J},c6_{I}{J},c12_{I}{J}; - /* #endfor */ - /* #if KERNEL_ELEC != 'None' */ - _fjsp_v2r8 velec,felec,velecsum,facel,crf,krf,krf2; - real *charge; - /* #endif */ - /* #if KERNEL_VDW != 'None' */ - int nvdwtype; - _fjsp_v2r8 rinvsix,rvdw,vvdw,vvdw6,vvdw12,fvdw,fvdw6,fvdw12,vvdwsum,sh_vdw_invrcut6; - int *vdwtype; - real *vdwparam; - _fjsp_v2r8 one_sixth = gmx_fjsp_set1_v2r8(1.0/6.0); - _fjsp_v2r8 one_twelfth = gmx_fjsp_set1_v2r8(1.0/12.0); - /* #endif */ - /* #if 'Table' in KERNEL_ELEC or 'Table' in KERNEL_VDW */ - _fjsp_v2r8 rt,vfeps,vftabscale,Y,F,G,H,Heps,Fp,VV,FF,twovfeps; - real *vftab; - /* #endif */ - /* #if 'LJEwald' in KERNEL_VDW */ - /* #for I,J in PAIRS_IJ */ - _fjsp_v2r8 c6grid_{I}{J}; - /* #endfor */ - real *vdwgridparam; - _fjsp_v2r8 ewclj,ewclj2,ewclj6,ewcljrsq,poly,exponent,f6A,f6B,sh_lj_ewald; - _fjsp_v2r8 one_half = gmx_fjsp_set1_v2r8(0.5); - _fjsp_v2r8 minus_one = gmx_fjsp_set1_v2r8(-1.0); - /* #endif */ - /* #if 'Ewald' in KERNEL_ELEC */ - _fjsp_v2r8 ewtabscale,eweps,sh_ewald,ewrt,ewtabhalfspace,ewtabF,ewtabFn,ewtabD,ewtabV; - real *ewtab; - /* #endif */ - /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */ - _fjsp_v2r8 rswitch,swV3,swV4,swV5,swF2,swF3,swF4,d,d2,sw,dsw; - real rswitch_scalar,d_scalar; - /* #endif */ - _fjsp_v2r8 itab_tmp; - _fjsp_v2r8 dummy_mask,cutoff_mask; - _fjsp_v2r8 one = gmx_fjsp_set1_v2r8(1.0); - _fjsp_v2r8 two = gmx_fjsp_set1_v2r8(2.0); - union { _fjsp_v2r8 simd; long long int i[2]; } vfconv,gbconv,ewconv; - - x = xx[0]; - f = ff[0]; - - nri = nlist->nri; - iinr = nlist->iinr; - jindex = nlist->jindex; - jjnr = nlist->jjnr; - shiftidx = nlist->shift; - gid = nlist->gid; - shiftvec = fr->shift_vec[0]; - fshift = fr->fshift[0]; - /* #if KERNEL_ELEC != 'None' */ - facel = gmx_fjsp_set1_v2r8(fr->ic->epsfac); - charge = mdatoms->chargeA; - /* #if 'ReactionField' in KERNEL_ELEC */ - krf = gmx_fjsp_set1_v2r8(fr->ic->k_rf); - krf2 = gmx_fjsp_set1_v2r8(fr->ic->k_rf*2.0); - crf = gmx_fjsp_set1_v2r8(fr->ic->c_rf); - /* #endif */ - /* #endif */ - /* #if KERNEL_VDW != 'None' */ - nvdwtype = fr->ntype; - vdwparam = fr->nbfp; - vdwtype = mdatoms->typeA; - /* #endif */ - /* #if 'LJEwald' in KERNEL_VDW */ - vdwgridparam = fr->ljpme_c6grid; - sh_lj_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_lj_ewald); - ewclj = gmx_fjsp_set1_v2r8(fr->ic->ewaldcoeff_lj); - ewclj2 = _fjsp_mul_v2r8(minus_one,_fjsp_mul_v2r8(ewclj,ewclj)); - /* #endif */ - - /* #if 'Table' in KERNEL_ELEC and 'Table' in KERNEL_VDW */ - vftab = kernel_data->table_elec_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec_vdw->scale); - /* #elif 'Table' in KERNEL_ELEC */ - vftab = kernel_data->table_elec->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_elec->scale); - /* #elif 'Table' in KERNEL_VDW */ - vftab = kernel_data->table_vdw->data; - vftabscale = gmx_fjsp_set1_v2r8(kernel_data->table_vdw->scale); - /* #endif */ - - /* #if 'Ewald' in KERNEL_ELEC */ - sh_ewald = gmx_fjsp_set1_v2r8(fr->ic->sh_ewald); - /* #if KERNEL_VF=='Force' and KERNEL_MOD_ELEC!='PotentialSwitch' */ - ewtab = fr->ic->tabq_coul_F; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - /* #else */ - ewtab = fr->ic->tabq_coul_FDV0; - ewtabscale = gmx_fjsp_set1_v2r8(fr->ic->tabq_scale); - ewtabhalfspace = gmx_fjsp_set1_v2r8(0.5/fr->ic->tabq_scale); - /* #endif */ - /* #endif */ - - /* #if 'Water' in GEOMETRY_I */ - /* Setup water-specific parameters */ - inr = nlist->iinr[0]; - /* #for I in PARTICLES_ELEC_I */ - iq{I} = _fjsp_mul_v2r8(facel,gmx_fjsp_set1_v2r8(charge[inr+{I}])); - /* #endfor */ - /* #for I in PARTICLES_VDW_I */ - vdwioffset{I} = 2*nvdwtype*vdwtype[inr+{I}]; - /* #endfor */ - /* #endif */ - - /* #if 'Water' in GEOMETRY_J */ - /* #for J in PARTICLES_ELEC_J */ - jq{J} = gmx_fjsp_set1_v2r8(charge[inr+{J}]); - /* #endfor */ - /* #for J in PARTICLES_VDW_J */ - vdwjidx{J}A = 2*vdwtype[inr+{J}]; - /* #endfor */ - /* #for I,J in PAIRS_IJ */ - /* #if 'electrostatics' in INTERACTION_FLAGS[I][J] */ - qq{I}{J} = _fjsp_mul_v2r8(iq{I},jq{J}); - /* #endif */ - /* #if 'vdw' in INTERACTION_FLAGS[I][J] */ - /* #if 'LJEwald' in KERNEL_VDW */ - c6_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]); - c12_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]); - c6grid_{I}{J} = gmx_fjsp_set1_v2r8(vdwgridparam[vdwioffset{I}+vdwjidx{J}A]); - /* #else */ - c6_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A]); - c12_{I}{J} = gmx_fjsp_set1_v2r8(vdwparam[vdwioffset{I}+vdwjidx{J}A+1]); - /* #endif */ - /* #endif */ - /* #endfor */ - /* #endif */ - - /* #if KERNEL_MOD_ELEC!='None' or KERNEL_MOD_VDW!='None' */ - /* #if KERNEL_ELEC!='None' */ - /* When we use explicit cutoffs the value must be identical for elec and VdW, so use elec as an arbitrary choice */ - rcutoff_scalar = fr->ic->rcoulomb; - /* #else */ - rcutoff_scalar = fr->ic->rvdw; - /* #endif */ - rcutoff = gmx_fjsp_set1_v2r8(rcutoff_scalar); - rcutoff2 = _fjsp_mul_v2r8(rcutoff,rcutoff); - /* #endif */ - - /* #if KERNEL_MOD_VDW=='PotentialShift' */ - sh_vdw_invrcut6 = gmx_fjsp_set1_v2r8(fr->ic->sh_invrc6); - rvdw = gmx_fjsp_set1_v2r8(fr->ic->rvdw); - /* #endif */ - - /* #if 'PotentialSwitch' in [KERNEL_MOD_ELEC,KERNEL_MOD_VDW] */ - /* #if KERNEL_MOD_ELEC=='PotentialSwitch' */ - rswitch_scalar = fr->ic->rcoulomb_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* #else */ - rswitch_scalar = fr->ic->rvdw_switch; - rswitch = gmx_fjsp_set1_v2r8(rswitch_scalar); - /* #endif */ - /* Setup switch parameters */ - d_scalar = rcutoff_scalar-rswitch_scalar; - d = gmx_fjsp_set1_v2r8(d_scalar); - swV3 = gmx_fjsp_set1_v2r8(-10.0/(d_scalar*d_scalar*d_scalar)); - swV4 = gmx_fjsp_set1_v2r8( 15.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swV5 = gmx_fjsp_set1_v2r8( -6.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - /* #if 'Force' in KERNEL_VF */ - swF2 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar)); - swF3 = gmx_fjsp_set1_v2r8( 60.0/(d_scalar*d_scalar*d_scalar*d_scalar)); - swF4 = gmx_fjsp_set1_v2r8(-30.0/(d_scalar*d_scalar*d_scalar*d_scalar*d_scalar)); - /* #endif */ - /* #endif */ - - /* Avoid stupid compiler warnings */ - jnrA = jnrB = 0; - j_coord_offsetA = 0; - j_coord_offsetB = 0; - - /* ## Keep track of the floating point operations we issue for reporting! */ - /* #define OUTERFLOPS 0 */ - outeriter = 0; - inneriter = 0; - - /* Start outer loop over neighborlists */ - for(iidx=0; iidxenergygrp_elec+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ - /* #if KERNEL_VDW != 'None' */ - gmx_fjsp_update_1pot_v2r8(vvdwsum,kernel_data->energygrp_vdw+ggid); - /* #define OUTERFLOPS OUTERFLOPS+1 */ - /* #endif */ - /* #endif */ - - /* Increment number of inner iterations */ - inneriter += j_index_end - j_index_start; - - /* Outer loop uses {OUTERFLOPS} flops */ - } - - /* Increment number of outer iterations */ - outeriter += nri; - - /* Update outer/inner flops */ - /* ## NB: This is not important, it just affects the flopcount. However, since our preprocessor is */ - /* ## primitive and replaces aggressively even in strings inside these directives, we need to */ - /* ## assemble the main part of the name (containing KERNEL/ELEC/VDW) directly in the source. */ - /* #if GEOMETRY_I == 'Water3' */ - /* #define ISUFFIX '_W3' */ - /* #elif GEOMETRY_I == 'Water4' */ - /* #define ISUFFIX '_W4' */ - /* #else */ - /* #define ISUFFIX '' */ - /* #endif */ - /* #if GEOMETRY_J == 'Water3' */ - /* #define JSUFFIX 'W3' */ - /* #elif GEOMETRY_J == 'Water4' */ - /* #define JSUFFIX 'W4' */ - /* #else */ - /* #define JSUFFIX '' */ - /* #endif */ - /* #if 'PotentialAndForce' in KERNEL_VF */ - /* #define VFSUFFIX '_VF' */ - /* #elif 'Potential' in KERNEL_VF */ - /* #define VFSUFFIX '_V' */ - /* #else */ - /* #define VFSUFFIX '_F' */ - /* #endif */ - - /* #if KERNEL_ELEC != 'None' and KERNEL_VDW != 'None' */ - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS}); - /* #elif KERNEL_ELEC != 'None' */ - inc_nrnb(nrnb,eNR_NBKERNEL_ELEC{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS}); - /* #else */ - inc_nrnb(nrnb,eNR_NBKERNEL_VDW{ISUFFIX}{JSUFFIX}{VFSUFFIX},outeriter*{OUTERFLOPS} + inneriter*{INNERFLOPS}); - /* #endif */ -} diff --git a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp index 0567e1d812..270f9ecf59 100644 --- a/src/gromacs/gmxlib/nonbonded/nonbonded.cpp +++ b/src/gromacs/gmxlib/nonbonded/nonbonded.cpp @@ -98,10 +98,6 @@ #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE # include "gromacs/gmxlib/nonbonded/nb_kernel_avx_256_double/nb_kernel_avx_256_double.h" #endif -#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE -# include "gromacs/gmxlib/nonbonded/nb_kernel_sparc64_hpc_ace_double/nb_kernel_sparc64_hpc_ace_double.h" -#endif - static tMPI_Thread_mutex_t nonbonded_setup_mutex = TMPI_THREAD_MUTEX_INITIALIZER; static gmx_bool nonbonded_setup_done = FALSE; @@ -150,9 +146,6 @@ gmx_nonbonded_setup(t_forcerec * fr, #endif #if (GMX_SIMD_X86_AVX_256 || GMX_SIMD_X86_AVX2_256) && GMX_DOUBLE nb_kernel_list_add_kernels(kernellist_avx_256_double, kernellist_avx_256_double_size); -#endif -#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE - nb_kernel_list_add_kernels(kernellist_sparc64_hpc_ace_double, kernellist_sparc64_hpc_ace_double_size); #endif ; /* empty statement to avoid a completely empty block */ } @@ -215,10 +208,6 @@ gmx_nonbonded_set_kernel_pointers(FILE *log, t_nblist *nl, gmx_bool bElecAndVdwS #if GMX_SIMD_X86_SSE4_1 && GMX_DOUBLE /* No padding - see comment above */ { "sse4_1_double", 1 }, -#endif -#if GMX_SIMD_SPARC64_HPC_ACE && GMX_DOUBLE - /* No padding - see comment above */ - { "sparc64_hpc_ace_double", 1 }, #endif { "c", 1 }, }; -- 2.22.0