Nonbonded group kernels for 256-bit AVX (Intel CPUs)
authorErik Lindahl <erik@kth.se>
Mon, 26 Nov 2012 00:13:26 +0000 (01:13 +0100)
committerRoland Schulz <roland@utk.edu>
Tue, 27 Nov 2012 22:40:57 +0000 (17:40 -0500)
commit51625d2b079f29441e8cac091fffc117cc227114
tree6c421d9ff41ae94ce1872fb6dda05996dbfcb1dd
parent777838f3fb94d6193afa986afb468c1b4367dacd
Nonbonded group kernels for 256-bit AVX (Intel CPUs)

These kernels employ the wider YMM registers (8 floats) for a slight
speedup. For PME we use the analytical correction rather than tables,
just as for the 128-bit AMD-optimized kernels. Kernels are in the ballpark
of 10% faster than SSE4.1 right now when tested on an ion channel. Not
huge, but that's because we don't spend a lot of time in the kernels
anymore.

Change-Id: I1da9b928e2d57223fc7708cc20dd7920771745ab
116 files changed:
include/gmx_x86_avx_256.h
src/gmxlib/gmx_cpuid.c
src/gmxlib/nonbonded/CMakeLists.txt
src/gmxlib/nonbonded/nb_kernel_avx_256_single/kernelutil_x86_avx_256_single.h [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/make_nb_kernel_avx_256_single.py [new file with mode: 0755]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwCSTab_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwLJ_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCSTab_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwCSTab_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwLJ_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecCoul_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwLJSh_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSh_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwLJSw_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEwSw_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwCSTab_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwLJ_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecEw_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwLJ_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecGB_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwLJSh_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwLJSw_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecNone_VdwLJ_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwCSTab_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSh_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwLJSw_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRFCut_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwCSTab_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwLJ_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomP1P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW3P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW3W3_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW4P1_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_ElecRF_VdwNone_GeomW4W4_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.c [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_avx_256_single.h [new file with mode: 0644]
src/gmxlib/nonbonded/nb_kernel_avx_256_single/nb_kernel_template_avx_256_single.pre [new file with mode: 0644]
src/gmxlib/nonbonded/nonbonded.c